package com.xiaojiezhu.spark.rdd2

import org.ansj.domain.Term
import org.ansj.splitWord.analysis.ToAnalysis
import org.apache.spark.{SparkConf, SparkContext}

/**
  * word count and order by with counter
  */
object ScalaWordCount {

  def main(args :Array[String]): Unit ={
    val conf = new SparkConf().setMaster("local").setAppName("wordcount")
    val sc = new SparkContext(conf)
    val rdd = sc.textFile("/home/zxj/data/test/all.txt")
    //获取分词信息
    val words = rdd.flatMap(x => ToAnalysis.parse(x).getTerms.toArray())
    //word count
    val r1 = words.map(x => (classOf[Term].cast(x).getName,1)).reduceByKey((x,y) => x+y)
    //covert to (count,word) to order by
    val r2 = r1.map(x => (x._2,x._1))
    // order by with desc
    val result = r2.sortByKey(ascending = false)
    // write result
    result.saveAsTextFile("/home/zxj/data/test/result")
  }
}
